In [ ]:
import os
import rasterio
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import balanced_accuracy_score, cohen_kappa_score
from sklearn.model_selection import StratifiedKFold
In [ ]:
# Set folder and read TIF files
folder = "data_m/sentinel-2"  # Corrected typo in path
tifs = sorted([os.path.join(folder, f) for f in os.listdir(folder) if f.endswith("tif")])
b8, *stack = tifs
In [ ]:
b8
Out[ ]:
'data_m/sentinel-2\\B02.tif'
In [ ]:
# Read and plot high-resolution raster
with rasterio.open(b8) as src:
    highres = src.read(1)
plt.imshow(highres, cmap='turbo')
plt.show()
No description has been provided for this image
In [ ]:
def read_bands_sentinel2(directory):
    bands = []
    band_files = ['B02.tif', 'B03.tif', 'B04.tif', 'B05.tif', 'B06.tif', 'B07.tif', 'B08.tif', 'B8A.tif']
    for band_file in band_files:
        file_path = f"{directory}/{band_file}"
        with rasterio.open(file_path) as dataset:
            bands.append(dataset.read(1))
    return bands

# Sentinel-2 bands
sentinel2_bands = read_bands_sentinel2("data_m/sentinel-2")
In [ ]:
 
In [ ]:
import numpy as np

# Visualization function for Sentinel-2
def visualize_sentinel2(sentinel2_bands, upper_percentile):
    # Standard RGB composite for Sentinel-2: B04 (Red), B03 (Green), B02 (Blue)
    rgb = np.stack([sentinel2_bands[2], sentinel2_bands[1], sentinel2_bands[0]], axis=-1)
    max_val = np.percentile(rgb, upper_percentile * 100)
    rgb_normalized = np.clip(rgb, 0, max_val) / max_val
    plt.imshow(rgb_normalized)
    plt.axis('off')
    plt.show()

visualize_sentinel2(sentinel2_bands, 0.90)
No description has been provided for this image
In [ ]:
sentinel2_bands[0]
Out[ ]:
array([[1964, 1433, 1276, ..., 1324, 1295, 1288],
       [1977, 1463, 1295, ..., 1287, 1272, 1284],
       [2006, 1514, 1286, ..., 1277, 1276, 1262],
       ...,
       [1188, 1196, 1201, ..., 1127, 1110, 1160],
       [1191, 1202, 1196, ..., 1117, 1135, 1143],
       [1219, 1227, 1207, ..., 1112, 1143, 1142]], dtype=uint16)
In [ ]:
def brightness(sentinel2):
    # Placeholder coefficients for Sentinel-2 brightness index
    b2, b3, b4, b8 = sentinel2[:4]  # Using first four bands as an example
    return b2 * 0.3 + b3 * 0.3 + b4 * 0.3 + b8 * 0.1

def greenness(sentinel2):
    # Placeholder coefficients for Sentinel-2 greenness index
    b2, b3, b4, b8 = sentinel2[:4]  # Using first four bands as an example
    return b3 * 0.5 - b2 * 0.2 - b4 * 0.2 - b8 * 0.1

def wetness(sentinel2):
    # Placeholder coefficients for Sentinel-2 wetness index
    b2, b3, b4, b8 = sentinel2[:4]  # Using first four bands as an example
    return b8 * 0.5 - b3 * 0.2 - b2 * 0.2 - b4 * 0.1

def mndwi(sentinel2):
    green, nir = sentinel2[1], sentinel2[6]  # B03 (Green) and B08 (NIR)
    return (green - nir) / (green + nir + 1e-10)

def ndbi(sentinel2):
    red, nir = sentinel2[2], sentinel2[6]  # B04 (Red) and B08 (NIR)
    return (nir - red) / (nir + red + 1e-10)

def ndvi(sentinel2):
    nir, red = sentinel2[6], sentinel2[2]  # B08 (NIR) and B04 (Red)
    return (nir - red) / (nir + red + 1e-10)

def savi(sentinel2, L=0.5):
    nir, red = sentinel2[6], sentinel2[2]  # B08 (NIR) and B04 (Red)
    return ((nir - red) / (nir + red + L + 1e-10)) * (1 + L)
In [ ]:
from rasterio.enums import Resampling
import rasterio

def resample_band(band, target_shape):
    # Adjust band data to target shape
    data = band.read(
        out_shape=target_shape,
        resampling=Resampling.bilinear
    )
    return data

# Define the path to your Sentinel-2 bands
folder = "data_m/sentinel-2"
tifs = sorted([os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(".tif")])

# Assuming you want to resample to the shape of the first band
with rasterio.open(tifs[0]) as first_band:
    target_shape = first_band.shape

# Resample all bands
resampled_bands = []
for tif in tifs:
    with rasterio.open(tif) as band:
        resampled_data = resample_band(band, target_shape=target_shape)
        resampled_bands.append(resampled_data)
In [ ]:
sentinel2_bands = resampled_bands
In [ ]:
# List of functions
functions = [mndwi, ndbi, ndvi, savi, brightness, greenness, wetness]

# Apply each function to the landsat data
# This will create a list of tuples with the function name and its output raster
smrs = [(func.__name__, func(sentinel2_bands)) for func in functions]

# If you need a dictionary instead of a list of tuples:
smrs_dict = {func.__name__: func(sentinel2_bands) for func in functions}
In [ ]:
import numpy as np

# Reshape the resampled bands
resampled_bands_2d = [np.squeeze(band) for band in resampled_bands]

# Check the shape again
print("Shape after squeezing:", resampled_bands_2d[0].shape)

# Try plotting again
plt.imshow(resampled_bands_2d[1], cmap='turbo')  # Plotting the second band
plt.colorbar()
plt.title('Example Resampled Band')
plt.show()
Shape after squeezing: (2341, 2928)
No description has been provided for this image
In [ ]:
# Normalize the band data for visualization
normalized_band = (resampled_bands_2d[1] - resampled_bands_2d[1].min()) / (resampled_bands_2d[1].max() - resampled_bands_2d[1].min())

plt.imshow(normalized_band, cmap='turbo')
plt.colorbar()
plt.title('Normalized Resampled Band')
plt.show()
No description has been provided for this image
In [ ]:
# Squeeze the 'greenness_raster' to ensure it is 2D
greenness_raster_2d = np.squeeze(greenness_raster)

# Now let's recalculate the percentiles on the squeezed array
vmin, vmax = np.percentile(greenness_raster_2d, [5, 95])  # Adjust the percentiles as needed

# Choose a different colormap
cmap = plt.cm.viridis

# Plot the greenness index with the adjusted color scale and colormap
plt.imshow(greenness_raster_2d, cmap=cmap, vmin=vmin, vmax=vmax)
plt.colorbar()
plt.title('Greenness Index')
plt.show()
No description has been provided for this image
In [ ]:
# Apply Laplacian of Gaussian (LoG) and plot the subsection of the filtered image
sigma = 3
out2 = gaussian_laplace(highres, sigma=sigma)
plt.imshow(out2, cmap='turbo')  # Remove the slicing to see the full image
plt.colorbar()
plt.title('Laplacian of Gaussian Filtered Image')
plt.show()
No description has been provided for this image
In [ ]:
# Resample and create the combined raster stack
with rasterio.open(b8) as src:  # Assuming b8 is still the file path to the B08 band
    affine = src.transform
    crs = src.crs
    target_shape = src.shape

# Assuming `smrs` is a list of tuples with (name, raster) for Sentinel-2
crst = sentinel2_sr + [band for name, band in smrs]
In [ ]:
crst
Out[ ]:
[array([[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        ...,
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34]], dtype=float32),
 array([[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        ...,
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34]], dtype=float32),
 array([[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        ...,
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34]], dtype=float32),
 array([[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        ...,
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34]], dtype=float32),
 array([[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        ...,
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34]], dtype=float32),
 array([[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        ...,
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34]], dtype=float32),
 array([[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        ...,
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34]], dtype=float32),
 array([[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        ...,
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
        [-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
         -3.3999998e+34, -3.3999998e+34, -3.3999998e+34]], dtype=float32),
 array([[-0., -0., -0., ..., -0., -0., -0.],
        [-0., -0., -0., ..., -0., -0., -0.],
        [-0., -0., -0., ..., -0., -0., -0.],
        ...,
        [-0., -0., -0., ..., -0., -0., -0.],
        [-0., -0., -0., ..., -0., -0., -0.],
        [-0., -0., -0., ..., -0., -0., -0.]], dtype=float32),
 array([[-0., -0., -0., ..., -0., -0., -0.],
        [-0., -0., -0., ..., -0., -0., -0.],
        [-0., -0., -0., ..., -0., -0., -0.],
        ...,
        [-0., -0., -0., ..., -0., -0., -0.],
        [-0., -0., -0., ..., -0., -0., -0.],
        [-0., -0., -0., ..., -0., -0., -0.]], dtype=float32),
 array([[-0., -0., -0., ..., -0., -0., -0.],
        [-0., -0., -0., ..., -0., -0., -0.],
        [-0., -0., -0., ..., -0., -0., -0.],
        ...,
        [-0., -0., -0., ..., -0., -0., -0.],
        [-0., -0., -0., ..., -0., -0., -0.],
        [-0., -0., -0., ..., -0., -0., -0.]], dtype=float32),
 array([[-0., -0., -0., ..., -0., -0., -0.],
        [-0., -0., -0., ..., -0., -0., -0.],
        [-0., -0., -0., ..., -0., -0., -0.],
        ...,
        [-0., -0., -0., ..., -0., -0., -0.],
        [-0., -0., -0., ..., -0., -0., -0.],
        [-0., -0., -0., ..., -0., -0., -0.]], dtype=float32),
 array([[-3.4000002e+38, -3.4000002e+38, -3.4000002e+38, ...,
         -3.4000002e+38, -3.4000002e+38, -3.4000002e+38],
        [-3.4000002e+38, -3.4000002e+38, -3.4000002e+38, ...,
         -3.4000002e+38, -3.4000002e+38, -3.4000002e+38],
        [-3.4000002e+38, -3.4000002e+38, -3.4000002e+38, ...,
         -3.4000002e+38, -3.4000002e+38, -3.4000002e+38],
        ...,
        [-3.4000002e+38, -3.4000002e+38, -3.4000002e+38, ...,
         -3.4000002e+38, -3.4000002e+38, -3.4000002e+38],
        [-3.4000002e+38, -3.4000002e+38, -3.4000002e+38, ...,
         -3.4000002e+38, -3.4000002e+38, -3.4000002e+38],
        [-3.4000002e+38, -3.4000002e+38, -3.4000002e+38, ...,
         -3.4000002e+38, -3.4000002e+38, -3.4000002e+38]], dtype=float32),
 array([[2.5353012e+30, 2.5353012e+30, 2.5353012e+30, ..., 2.5353012e+30,
         2.5353012e+30, 2.5353012e+30],
        [2.5353012e+30, 2.5353012e+30, 2.5353012e+30, ..., 2.5353012e+30,
         2.5353012e+30, 2.5353012e+30],
        [2.5353012e+30, 2.5353012e+30, 2.5353012e+30, ..., 2.5353012e+30,
         2.5353012e+30, 2.5353012e+30],
        ...,
        [2.5353012e+30, 2.5353012e+30, 2.5353012e+30, ..., 2.5353012e+30,
         2.5353012e+30, 2.5353012e+30],
        [2.5353012e+30, 2.5353012e+30, 2.5353012e+30, ..., 2.5353012e+30,
         2.5353012e+30, 2.5353012e+30],
        [2.5353012e+30, 2.5353012e+30, 2.5353012e+30, ..., 2.5353012e+30,
         2.5353012e+30, 2.5353012e+30]], dtype=float32),
 array([[2.5353012e+30, 2.5353012e+30, 2.5353012e+30, ..., 2.5353012e+30,
         2.5353012e+30, 2.5353012e+30],
        [2.5353012e+30, 2.5353012e+30, 2.5353012e+30, ..., 2.5353012e+30,
         2.5353012e+30, 2.5353012e+30],
        [2.5353012e+30, 2.5353012e+30, 2.5353012e+30, ..., 2.5353012e+30,
         2.5353012e+30, 2.5353012e+30],
        ...,
        [2.5353012e+30, 2.5353012e+30, 2.5353012e+30, ..., 2.5353012e+30,
         2.5353012e+30, 2.5353012e+30],
        [2.5353012e+30, 2.5353012e+30, 2.5353012e+30, ..., 2.5353012e+30,
         2.5353012e+30, 2.5353012e+30],
        [2.5353012e+30, 2.5353012e+30, 2.5353012e+30, ..., 2.5353012e+30,
         2.5353012e+30, 2.5353012e+30]], dtype=float32)]
In [ ]:
# Assuming landsat is a list of numpy arrays for each band
landsat_sr = [dn_to_reflectance(band) for band in landsat]
# Now you can replace infinite values with 0.0 for each band
landsat_sr = [np.where(np.isinf(band), 0.0, band) for band in landsat_sr]
In [ ]:
import numpy as np
import rasterio
from rasterio.warp import reproject, Resampling

# Define the function to convert digital numbers to reflectance for Sentinel-2
def sentinel_dn_to_reflectance(array, band_number, scale_factor, add_constant):
    # The scale factor and add_constant values should be retrieved from the metadata
    # For the sake of this example, we're using placeholders
    # Sentinel-2's scale factor is generally 0.0001 for reflectance bands
    return array * scale_factor + add_constant

# Read and process Sentinel-2 bands
sentinel2_bands = read_bands_sentinel2("data_m/sentinel-2")

# Convert Sentinel-2 bands to reflectance
# You will need to look up the correct scale factors and add_constants for each band
scale_factors = [0.0001] * len(sentinel2_bands)  # Replace with correct values from metadata
add_constants = [0.0] * len(sentinel2_bands)    # Replace with correct values from metadata

sentinel2_sr = [sentinel_dn_to_reflectance(band, i, scale_factors[i], add_constants[i]) 
                for i, band in enumerate(sentinel2_bands)]

# Replace infinite values with 0.0 for each band
sentinel2_sr = [np.where(np.isinf(band), 0.0, band) for band in sentinel2_sr]

# Assuming `smrs` is a list of tuples with (name, raster) for Sentinel-2
# Replace 'landsat_sr' with 'sentinel2_sr' to create the combined raster stack
crst = sentinel2_sr + [band for name, band in smrs]

# Assuming b8 is the file path to the B08 band which is typically used for high-resolution analysis
with rasterio.open(b8) as src:
    affine = src.transform
    crs = src.crs
    target_transform = src.transform  # Assuming b8 has the transform we want to match
    target_shape = src.shape
In [ ]:
crst
Out[ ]:
[array([[0.1964, 0.1433, 0.1276, ..., 0.1324, 0.1295, 0.1288],
        [0.1977, 0.1463, 0.1295, ..., 0.1287, 0.1272, 0.1284],
        [0.2006, 0.1514, 0.1286, ..., 0.1277, 0.1276, 0.1262],
        ...,
        [0.1188, 0.1196, 0.1201, ..., 0.1127, 0.111 , 0.116 ],
        [0.1191, 0.1202, 0.1196, ..., 0.1117, 0.1135, 0.1143],
        [0.1219, 0.1227, 0.1207, ..., 0.1112, 0.1143, 0.1142]]),
 array([[0.231 , 0.1711, 0.1446, ..., 0.1696, 0.1694, 0.1684],
        [0.2408, 0.17  , 0.1481, ..., 0.1678, 0.1662, 0.1658],
        [0.2406, 0.1766, 0.1497, ..., 0.1656, 0.1656, 0.1648],
        ...,
        [0.1345, 0.1404, 0.1365, ..., 0.1316, 0.1281, 0.1277],
        [0.1332, 0.1344, 0.1357, ..., 0.1276, 0.129 , 0.1302],
        [0.1373, 0.1383, 0.1414, ..., 0.1284, 0.1294, 0.13  ]]),
 array([[0.2626, 0.1728, 0.1302, ..., 0.1297, 0.1294, 0.1273],
        [0.2728, 0.1682, 0.1313, ..., 0.1275, 0.1277, 0.1274],
        [0.2732, 0.18  , 0.1352, ..., 0.126 , 0.1252, 0.1268],
        ...,
        [0.1183, 0.1177, 0.1179, ..., 0.1149, 0.1154, 0.1171],
        [0.1183, 0.1186, 0.1186, ..., 0.1151, 0.1187, 0.1162],
        [0.1207, 0.1213, 0.1218, ..., 0.1149, 0.1178, 0.1164]]),
 array([[0.2901, 0.1922, 0.1587, ..., 0.2276, 0.2253, 0.2242],
        [0.3016, 0.1862, 0.1635, ..., 0.2096, 0.2118, 0.209 ],
        [0.2931, 0.203 , 0.1651, ..., 0.208 , 0.209 , 0.2095],
        ...,
        [0.1652, 0.1676, 0.1629, ..., 0.16  , 0.1487, 0.1434],
        [0.1609, 0.1592, 0.1582, ..., 0.1649, 0.1671, 0.1585],
        [0.1626, 0.1635, 0.1672, ..., 0.1555, 0.1558, 0.1543]]),
 array([[0.3325, 0.3737, 0.3844, ..., 0.4647, 0.4765, 0.4754],
        [0.3344, 0.3735, 0.3951, ..., 0.5097, 0.5301, 0.5272],
        [0.3275, 0.3748, 0.3989, ..., 0.5057, 0.5215, 0.5254],
        ...,
        [0.3439, 0.3449, 0.3459, ..., 0.338 , 0.2901, 0.2536],
        [0.3425, 0.3312, 0.315 , ..., 0.3686, 0.3328, 0.3005],
        [0.3411, 0.3417, 0.3418, ..., 0.3081, 0.3013, 0.2982]]),
 array([[0.3697, 0.4739, 0.5222, ..., 0.5294, 0.556 , 0.5763],
        [0.359 , 0.4785, 0.5366, ..., 0.6067, 0.6305, 0.6301],
        [0.3456, 0.4775, 0.5266, ..., 0.5901, 0.6252, 0.6387],
        ...,
        [0.403 , 0.4104, 0.3919, ..., 0.4121, 0.3258, 0.2892],
        [0.3872, 0.3911, 0.3851, ..., 0.4185, 0.4135, 0.3657],
        [0.3914, 0.403 , 0.4041, ..., 0.3544, 0.3452, 0.3397]]),
 array([[0.374 , 0.4548, 0.5104, ..., 0.618 , 0.6104, 0.6008],
        [0.3688, 0.4544, 0.5176, ..., 0.6172, 0.6168, 0.6148],
        [0.3648, 0.456 , 0.5212, ..., 0.6244, 0.6268, 0.6308],
        ...,
        [0.3894, 0.4204, 0.3976, ..., 0.4344, 0.357 , 0.3104],
        [0.3954, 0.4028, 0.4096, ..., 0.347 , 0.3676, 0.32  ],
        [0.4068, 0.4082, 0.4264, ..., 0.3162, 0.3584, 0.331 ]]),
 array([[0.3828, 0.4992, 0.542 , ..., 0.5731, 0.5838, 0.5867],
        [0.3881, 0.4936, 0.5592, ..., 0.6404, 0.6662, 0.6715],
        [0.3758, 0.4844, 0.5518, ..., 0.6358, 0.6524, 0.6625],
        ...,
        [0.4368, 0.4194, 0.43  , ..., 0.4393, 0.3731, 0.3202],
        [0.4172, 0.4136, 0.3967, ..., 0.4508, 0.415 , 0.3864],
        [0.4243, 0.4361, 0.4307, ..., 0.3875, 0.3743, 0.3661]]),
 array([[[10.59603306, 10.01741492,  9.4470229 , ...,  7.75165058,
           7.83867658,  7.95787832],
         [10.54068241, 10.04035874,  9.28961995, ...,  7.77605096,
           7.79438059,  7.82039457],
         [10.62008589,  9.91811571,  9.21463705, ...,  7.71493671,
           7.68854114,  7.65158371],
         ...,
         [12.02271426, 11.18687589, 11.78150159, ..., 11.04381625,
          13.03793032, 14.54211367],
         [11.9020053 , 11.69992554, 11.51604621, ..., 13.34639697,
          12.71647201, 14.13549534],
         [11.54953134, 11.49807868, 11.04015498, ..., 14.31803869,
          12.96555966, 13.78004338]]]),
 array([[[0.17499215, 0.44933078, 0.59350609, ..., 0.65306941,
          0.65017572, 0.65032276],
         [0.14962594, 0.45968519, 0.59531515, ..., 0.65758023,
          0.65695097, 0.65669631],
         [0.14357367, 0.43396226, 0.58805606, ..., 0.6641791 ,
          0.66702128, 0.66525871],
         ...,
         [0.53397676, 0.56253484, 0.54258002, ..., 0.58164937,
          0.51143099, 0.45216374],
         [0.53941989, 0.54507096, 0.55092768, ..., 0.50183943,
          0.51182398, 0.46721687],
         [0.54236967, 0.54183192, 0.55563663, ..., 0.46694502,
          0.5052499 , 0.47966026]]]),
 array([[[0.17499215, 0.44933078, 0.59350609, ..., 0.65306941,
          0.65017572, 0.65032276],
         [0.14962594, 0.45968519, 0.59531515, ..., 0.65758023,
          0.65695097, 0.65669631],
         [0.14357367, 0.43396226, 0.58805606, ..., 0.6641791 ,
          0.66702128, 0.66525871],
         ...,
         [0.53397676, 0.56253484, 0.54258002, ..., 0.58164937,
          0.51143099, 0.45216374],
         [0.53941989, 0.54507096, 0.55092768, ..., 0.50183943,
          0.51182398, 0.46721687],
         [0.54236967, 0.54183192, 0.55563663, ..., 0.46694502,
          0.5052499 , 0.47966026]]]),
 array([[[0.2624676 , 0.67394248, 0.89018965, ..., 0.97953862,
          0.97519768, 0.97541715],
         [0.22442141, 0.68947242, 0.89290392, ..., 0.98630413,
          0.98536028, 0.98497811],
         [0.21534363, 0.65089223, 0.88201691, ..., 0.99620228,
          1.00046539, 0.99782221],
         ...,
         [0.80088626, 0.84372387, 0.8137911 , ..., 0.87239465,
          0.7670653 , 0.6781663 ],
         [0.80905109, 0.81752805, 0.8263133 , ..., 0.7526777 ,
          0.76765704, 0.70074499],
         [0.8134774 , 0.81267114, 0.83337893, ..., 0.70033631,
          0.75779528, 0.71940999]]]),
 array([[[2360.1, 1727.2, 1423.9, ..., 1520.1, 1509.4, 1497.7],
         [2426.9, 1720.9, 1443. , ..., 1493.5, 1484.1, 1485.2],
         [2441.9, 1795. , 1456. , ..., 1472.5, 1468.6, 1466.2],
         ...,
         [1276.1, 1294.2, 1284. , ..., 1240.2, 1222.7, 1239.9],
         [1274. , 1281.8, 1284.1, ..., 1221. , 1239.8, 1237.5],
         [1302.3, 1309.7, 1315. , ..., 1218.9, 1239.2, 1236.1]]]),
 array([[[-53.1, -42.3,  -9.3, ...,  98.8, 104.7, 105.6],
         [-30. , -46.4,   2.6, ..., 105.1, 100.4,  97. ],
         [-43.3, -50.8,   5.4, ..., 106. , 109. , 105.2],
         ...,
         [ 37. ,  66.3,  46. , ...,  40.2,  28.5,  14.8],
         [ 29. ,  32.2,  39.7, ...,  26.6,  24.4,  34.6],
         [ 38.7,  40.7,  58.7, ...,  34.4,  28.1,  34.5]]]),
 array([[[333.1, 526.4, 408.9, ..., 391.3, 395.3, 399.3],
         [315.2, 536.2, 395. , ..., 387. , 389.5, 386.2],
         [337.9, 519. , 385.7, ..., 360.4, 355.4, 355.2],
         ...,
         [181.6, 167.8, 171.4, ..., 209.5, 202.4, 183. ],
         [188.1, 183.2, 182.8, ..., 195.3, 177.3, 171.8],
         [173.9, 170.7, 170.5, ..., 182.9, 168.3, 166.7]]])]
In [ ]:
# Function to resample an array to match the target raster
def resample_to_raster(src_array, src_transform, src_crs, target_raster_path):
    with rasterio.open(target_raster_path) as target_raster:
        target_shape = target_raster.shape
        target_transform = target_raster.transform
        target_crs = target_raster.crs

        resampled_array = np.empty(target_shape, dtype=src_array.dtype)
        reproject(
            source=src_array,
            destination=resampled_array,
            src_transform=src_transform,
            src_crs=src_crs,
            dst_transform=target_transform,
            dst_crs=target_crs,
            resampling=Resampling.bilinear
        )
        return resampled_array

# Assuming crst is a list of numpy arrays representing the combined raster stack
# Define paths to training and validation set rasters
train_raster_path = os.path.join("data_m/munich_training.tif")
val_raster_path = os.path.join("data_m/munich_validation.tif")
In [ ]:
# Open the first raster in sentinel2_bands to get crs and transform
with rasterio.open(b8) as src:  # Assuming you want to use B02 for crs and transform
    src_transform = src.transform
    src_crs = src.crs

# Resample your stack to the training and validation datasets
trainrs = [resample_to_raster(raster, src_transform, src_crs, train_raster_path) for raster in crst]
valrs = [resample_to_raster(raster, src_transform, src_crs, val_raster_path) for raster in crst]
In [ ]:
valrs
Out[ ]:
[array([[0.15356918, 0.18619078, 0.21632533, ..., 0.        , 0.        ,
         0.        ],
        [0.15996372, 0.18228472, 0.20241405, ..., 0.        , 0.        ,
         0.        ],
        [0.17756013, 0.18499236, 0.17683913, ..., 0.        , 0.        ,
         0.        ],
        ...,
        [0.16763321, 0.19904838, 0.19861306, ..., 0.        , 0.        ,
         0.        ],
        [0.15188736, 0.16774144, 0.16060217, ..., 0.        , 0.        ,
         0.        ],
        [0.12164528, 0.12859054, 0.12780843, ..., 0.        , 0.        ,
         0.        ]]),
 array([[0.17233257, 0.20223479, 0.23958778, ..., 0.        , 0.        ,
         0.        ],
        [0.18619048, 0.20579693, 0.22255609, ..., 0.        , 0.        ,
         0.        ],
        [0.20616019, 0.21201495, 0.20929842, ..., 0.        , 0.        ,
         0.        ],
        ...,
        [0.18307601, 0.19629549, 0.20731164, ..., 0.        , 0.        ,
         0.        ],
        [0.16875808, 0.17551845, 0.17535673, ..., 0.        , 0.        ,
         0.        ],
        [0.14366143, 0.15133531, 0.15135189, ..., 0.        , 0.        ,
         0.        ]]),
 array([[0.17475415, 0.21418065, 0.24274322, ..., 0.        , 0.        ,
         0.        ],
        [0.18589872, 0.21669778, 0.23965327, ..., 0.        , 0.        ,
         0.        ],
        [0.2142721 , 0.22097837, 0.20410899, ..., 0.        , 0.        ,
         0.        ],
        ...,
        [0.17100312, 0.19422466, 0.19845509, ..., 0.        , 0.        ,
         0.        ],
        [0.15645082, 0.16641619, 0.16262694, ..., 0.        , 0.        ,
         0.        ],
        [0.12881286, 0.13739064, 0.1358647 , ..., 0.        , 0.        ,
         0.        ]]),
 array([[0.3674605 , 0.3435625 , 0.26316939, ..., 0.        , 0.        ,
         0.        ],
        [0.28069858, 0.26028224, 0.22475733, ..., 0.        , 0.        ,
         0.        ],
        [0.22989208, 0.22338268, 0.22031715, ..., 0.        , 0.        ,
         0.        ],
        ...,
        [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ]]),
 array([[0.39066329, 0.37364253, 0.35338183, ..., 0.        , 0.        ,
         0.        ],
        [0.36439798, 0.34903737, 0.34226339, ..., 0.        , 0.        ,
         0.        ],
        [0.35202646, 0.3522999 , 0.3587746 , ..., 0.        , 0.        ,
         0.        ],
        ...,
        [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ]]),
 array([[0.40650246, 0.39234175, 0.38610653, ..., 0.        , 0.        ,
         0.        ],
        [0.39558502, 0.38585678, 0.38821847, ..., 0.        , 0.        ,
         0.        ],
        [0.3875339 , 0.38248344, 0.39315632, ..., 0.        , 0.        ,
         0.        ],
        ...,
        [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ]]),
 array([[0.30985396, 0.30146089, 0.343658  , ..., 0.        , 0.        ,
         0.        ],
        [0.37149817, 0.34317773, 0.34984341, ..., 0.        , 0.        ,
         0.        ],
        [0.39669522, 0.39020706, 0.39714359, ..., 0.        , 0.        ,
         0.        ],
        ...,
        [0.31319273, 0.32184773, 0.34604266, ..., 0.        , 0.        ,
         0.        ],
        [0.31544681, 0.32553554, 0.32265193, ..., 0.        , 0.        ,
         0.        ],
        [0.34762168, 0.37586748, 0.36707344, ..., 0.        , 0.        ,
         0.        ]]),
 array([[0.41323875, 0.40598401, 0.40695634, ..., 0.        , 0.        ,
         0.        ],
        [0.40930351, 0.40053686, 0.41095442, ..., 0.        , 0.        ,
         0.        ],
        [0.41364292, 0.41470789, 0.41992427, ..., 0.        , 0.        ,
         0.        ],
        ...,
        [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ]]),
 array([[13.401633  , 12.92216089, 11.14805342, ...,  0.        ,
          0.        ,  0.        ],
        [11.48559297, 11.7594568 , 11.25912044, ...,  0.        ,
          0.        ,  0.        ],
        [10.55857546, 10.59144262, 10.49773871, ...,  0.        ,
          0.        ,  0.        ],
        ...,
        [13.15631855, 12.47885467, 11.65207668, ...,  0.        ,
          0.        ,  0.        ],
        [13.2758039 , 12.80507447, 12.92077035, ...,  0.        ,
          0.        ,  0.        ],
        [12.95558034, 12.04102709, 12.25940703, ...,  0.        ,
          0.        ,  0.        ]]),
 array([[0.27884161, 0.16849986, 0.17116097, ..., 0.        , 0.        ,
         0.        ],
        [0.33039098, 0.22417862, 0.18560495, ..., 0.        , 0.        ,
         0.        ],
        [0.29971925, 0.27785492, 0.32241209, ..., 0.        , 0.        ,
         0.        ],
        ...,
        [0.2952175 , 0.24948777, 0.26908283, ..., 0.        , 0.        ,
         0.        ],
        [0.33872611, 0.32644652, 0.33340774, ..., 0.        , 0.        ,
         0.        ],
        [0.45839707, 0.4631294 , 0.45794567, ..., 0.        , 0.        ,
         0.        ]]),
 array([[0.27884161, 0.16849986, 0.17116097, ..., 0.        , 0.        ,
         0.        ],
        [0.33039098, 0.22417862, 0.18560495, ..., 0.        , 0.        ,
         0.        ],
        [0.29971925, 0.27785492, 0.32241209, ..., 0.        , 0.        ,
         0.        ],
        ...,
        [0.2952175 , 0.24948777, 0.26908283, ..., 0.        , 0.        ,
         0.        ],
        [0.33872611, 0.32644652, 0.33340774, ..., 0.        , 0.        ,
         0.        ],
        [0.45839707, 0.4631294 , 0.45794567, ..., 0.        , 0.        ,
         0.        ]]),
 array([[0.41821884, 0.25272525, 0.25671959, ..., 0.        , 0.        ,
         0.        ],
        [0.49554219, 0.33623796, 0.27838393, ..., 0.        , 0.        ,
         0.        ],
        [0.44954192, 0.41674814, 0.48357774, ..., 0.        , 0.        ,
         0.        ],
        ...,
        [0.4427795 , 0.37419478, 0.4035873 , ..., 0.        , 0.        ,
         0.        ],
        [0.50803488, 0.48961945, 0.50005929, ..., 0.        , 0.        ,
         0.        ],
        [0.68752342, 0.69462648, 0.68685031, ..., 0.        , 0.        ,
         0.        ]]),
 array([[1714.15914624, 2020.78022215, 2322.0051347 , ...,    0.        ,
            0.        ,    0.        ],
        [1814.46177141, 2033.68098747, 2223.2570349 , ...,    0.        ,
            0.        ,    0.        ],
        [2033.58796385, 2094.12347237, 2014.33166583, ...,    0.        ,
            0.        ,    0.        ],
        ...,
        [1753.58904921, 1960.79321291, 2008.6562132 , ...,    0.        ,
            0.        ,    0.        ],
        [1612.91162565, 1717.58213304, 1688.93630121, ...,    0.        ,
            0.        ,    0.        ],
        [1357.50924432, 1434.08049557, 1431.57090066, ...,    0.        ,
            0.        ,    0.        ]]),
 array([[-7.17523299, -2.53051898, 53.76568205, ...,  0.        ,
          0.        ,  0.        ],
        [20.9245266 , 11.6769757 , -0.7409797 , ...,  0.        ,
          0.        ,  0.        ],
        [ 7.52579398,  7.96689003, 41.00378592, ...,  0.        ,
          0.        ,  0.        ],
        ...,
        [49.65539476,  2.84371525, 46.90509594, ...,  0.        ,
          0.        ,  0.        ],
        [45.49117435, 20.72311273, 37.14663244, ...,  0.        ,
          0.        ,  0.        ],
        [42.24030341, 42.58320086, 42.91734008, ...,  0.        ,
          0.        ,  0.        ]]),
 array([[234.39957515,  73.77623146, -24.38871135, ...,   0.        ,
           0.        ,   0.        ],
        [213.30787057, 103.8524491 ,  57.34040169, ...,   0.        ,
           0.        ,   0.        ],
        [216.34076353, 185.83914267, 241.57628802, ...,   0.        ,
           0.        ,   0.        ],
        ...,
        [ 69.83848219, -24.47420188, -32.72040495, ...,   0.        ,
           0.        ,   0.        ],
        [110.37261708,  89.83350752, 131.3491711 , ...,   0.        ,
           0.        ,   0.        ],
        [216.32641271, 213.41261753, 238.29387495, ...,   0.        ,
           0.        ,   0.        ]])]
In [ ]:
# Write out the resampled data
with rasterio.open(train_raster_path) as src:
    profile = src.profile
    profile.update(dtype=rasterio.float32, count=len(trainrs))
    with rasterio.open('trainbands_sentinel_munich.tif', 'w', **profile) as dst:
        for i, array in enumerate(trainrs, start=1):
            dst.write(array.astype(rasterio.float32), i)

with rasterio.open(val_raster_path) as src:
    profile = src.profile
    profile.update(dtype=rasterio.float32, count=len(valrs))
    with rasterio.open('valbands_sentinel_munich.tif', 'w', **profile) as dst:
        for i, array in enumerate(valrs, start=1):
            dst.write(array.astype(rasterio.float32), i)
In [ ]:
def rasters_to_dataframe(raster_paths, class_raster_path):
    # Read class raster
    with rasterio.open(class_raster_path) as class_raster:
        class_data = class_raster.read(1).flatten()
    
    data = {'class': class_data}
    
    for i, raster_path in enumerate(raster_paths):
        with rasterio.open(raster_path) as raster:
            raster_data = raster.read(1).flatten()
            band_name = f'band_{i+1}'
            data[band_name] = raster_data
    
    df = pd.DataFrame(data)
    return df

# Define paths to training and validation set rasters
train_raster_path = os.path.join("data_m/munich_training.tif")
val_raster_path = os.path.join("data_m/munich_validation.tif")
In [ ]:
# Open the first raster in crst to get crs and transform
with rasterio.open(b8) as src:  # Assuming b8 is a file path to a raster in crst
    src_transform = src.transform
    src_crs = src.crs

# Resample our stack to the training and validation datasets
trainrs = [resample_to_raster(raster, src_transform, src_crs, train_raster_path) for raster in crst]
valrs = [resample_to_raster(raster, src_transform, src_crs, val_raster_path) for raster in crst]
In [ ]:
# Write out the resampled data
with rasterio.open(train_raster_path) as src:
    profile = src.profile
    profile.update(dtype=rasterio.float32, count=len(trainrs))
    with rasterio.open('trainbands_sentinel.tif', 'w', **profile) as dst:
        for i, array in enumerate(trainrs, start=1):
            dst.write(array.astype(rasterio.float32), i)

with rasterio.open(val_raster_path) as src:
    profile = src.profile
    profile.update(dtype=rasterio.float32, count=len(valrs))
    with rasterio.open('valbands_sentinel.tif', 'w', **profile) as dst:
        for i, array in enumerate(valrs, start=1):
            dst.write(array.astype(rasterio.float32), i)
In [ ]:
# Transform the RasterStack into a DataFrame
# Here we need to flatten the raster arrays and combine them into a dataframe
# This can be memory intensive and might need optimization for large rasters

# Helper function to flatten and combine rasters into a dataframe
def rasters_to_dataframe(rasters, class_raster_path):
    # Read class raster
    with rasterio.open(class_raster_path) as class_raster:
        class_data = class_raster.read(1).flatten()
    
    data = {'class': class_data}
    for i, raster in enumerate(rasters):
        data[f'band_{i+1}'] = raster.flatten()
    
    df = pd.DataFrame(data)
    return df

# Create dataframes for training and validation
train_df = rasters_to_dataframe(trainrs, train_raster_path)
val_df = rasters_to_dataframe(valrs, val_raster_path)
In [ ]:
train_df
Out[ ]:
class band_1 band_2 band_3 band_4 band_5 band_6 band_7 band_8 band_9 band_10 band_11 band_12 band_13 band_14 band_15
0 0 0.000000 0.000000 0.000000 0.0 0.0 0.0 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
1 0 0.000000 0.000000 0.000000 0.0 0.0 0.0 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
2 0 0.000000 0.000000 0.000000 0.0 0.0 0.0 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
3 0 0.000000 0.000000 0.000000 0.0 0.0 0.0 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
4 0 0.000000 0.000000 0.000000 0.0 0.0 0.0 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
7620391 0 0.122571 0.146188 0.128295 0.0 0.0 0.0 0.514778 0.0 9.357737 0.600985 0.600985 0.901408 1353.875209 66.497524 147.741836
7620392 0 0.123020 0.145848 0.127469 0.0 0.0 0.0 0.516879 0.0 9.329054 0.604347 0.604347 0.906450 1352.562503 64.711552 152.540054
7620393 0 0.123262 0.144189 0.127049 0.0 0.0 0.0 0.515817 0.0 9.366624 0.604743 0.604743 0.907044 1347.913460 55.913481 160.105623
7620394 0 0.121898 0.144145 0.126898 0.0 0.0 0.0 0.514479 0.0 9.388263 0.604293 0.604293 0.906369 1343.701400 58.257346 165.394981
7620395 0 0.120907 0.144483 0.126878 0.0 0.0 0.0 0.512640 0.0 9.413135 0.603202 0.603202 0.904732 1341.801145 61.849239 167.330073

7620396 rows × 16 columns

In [ ]:
# Combine training and validation dataframes
combined_df = pd.concat([train_df, val_df], ignore_index=True)

# Drop rows where class is 0 and rows with infinite values
#combined_df = combined_df[(combined_df['class'] != 0) & ~np.isinf(combined_df).any(axis=1)]

# Drop 'X' and 'Y' if they exist in the dataframe
combined_df = combined_df.drop(columns=['X', 'Y'], errors='ignore')

combined_df
Out[ ]:
class band_1 band_2 band_3 band_4 band_5 band_6 band_7 band_8 band_9 band_10 band_11 band_12 band_13 band_14 band_15
0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
3 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
4 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
10820336 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
10820337 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
10820338 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
10820339 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
10820340 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

10820341 rows × 16 columns

In [ ]:
from sklearn.preprocessing import LabelEncoder

# Assuming 'combined_df' is your DataFrame and it includes a 'class' column
# Plot a histogram of the 'class' column
plt.hist(combined_df['class'], bins='auto')  # 'auto' lets matplotlib decide the number of bins
plt.title('Class Distribution')
plt.xlabel('Class')
plt.ylabel('Frequency')
plt.show()

# Convert the 'class' column to a numerical format if it's categorical
if combined_df['class'].dtype == 'object':
    le = LabelEncoder()
    combined_df['class'] = le.fit_transform(combined_df['class'])

# Describe the DataFrame to check for Inf, NaNs, or other odd patterns
description = combined_df.describe(include='all')  # include='all' to get statistics for all columns
print(description)

# Print out the schema of the DataFra
combined_df.dtypes
No description has been provided for this image
              class        band_1        band_2        band_3        band_4  \
count  1.082034e+07  1.082034e+07  1.082034e+07  1.082034e+07  1.082034e+07   
mean   6.296555e-01  1.374456e-01  1.575983e-01  1.541511e-01  4.622750e-02   
std    7.745073e-01  6.067584e-02  6.745948e-02  7.305796e-02  9.070470e-02   
min    0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00   
25%    0.000000e+00  1.223063e-01  1.418066e-01  1.253014e-01  0.000000e+00   
50%    0.000000e+00  1.400209e-01  1.650636e-01  1.542141e-01  0.000000e+00   
75%    1.000000e+00  1.663457e-01  1.896646e-01  1.937141e-01  0.000000e+00   
max    2.000000e+00  1.956800e+00  1.843200e+00  1.771200e+00  1.736900e+00   

             band_5        band_6        band_7        band_8        band_9  \
count  1.082034e+07  1.082034e+07  1.082034e+07  1.082034e+07  1.082034e+07   
mean   7.080163e-02  7.920400e-02  3.213029e-01  8.398045e-02  1.068471e+01   
std    1.378557e-01  1.550626e-01  1.361907e-01  1.644535e-01  4.308195e+00   
min    0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00   
25%    0.000000e+00  0.000000e+00  2.858879e-01  0.000000e+00  1.020321e+01   
50%    0.000000e+00  0.000000e+00  3.423224e-01  0.000000e+00  1.169260e+01   
75%    0.000000e+00  0.000000e+00  4.020957e-01  0.000000e+00  1.305446e+01   
max    1.709000e+00  1.694475e+00  1.688000e+00  1.676200e+00  2.822862e+01   

            band_10       band_11       band_12       band_13       band_14  \
count  1.082034e+07  1.082034e+07  1.082034e+07  1.082034e+07  1.082034e+07   
mean   4.384063e-01  4.384063e-01  6.575357e-01  1.539329e+03  1.305443e+01   
std    1.457378e+00  1.457378e+00  2.185665e+00  6.751483e+02  4.525055e+01   
min    0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00 -2.256087e+03   
25%    1.493996e-01  1.493996e-01  2.240770e-01  1.346875e+03 -5.723911e+00   
50%    3.335757e-01  3.335757e-01  5.003158e-01  1.589444e+03  1.420160e+01   
75%    4.873635e-01  4.873635e-01  7.309766e-01  1.879604e+03  3.851817e+01   
max    3.077051e+01  3.077051e+01  4.614493e+01  1.839159e+04  3.180503e+03   

            band_15  
count  1.082034e+07  
mean   2.144795e+02  
std    1.322087e+02  
min   -5.754183e+03  
25%    1.490908e+02  
50%    2.239966e+02  
75%    2.964437e+02  
max    7.018870e+03  
Out[ ]:
class        uint8
band_1     float64
band_2     float64
band_3     float64
band_4     float64
band_5     float64
band_6     float64
band_7     float64
band_8     float64
band_9     float64
band_10    float64
band_11    float64
band_12    float64
band_13    float64
band_14    float64
band_15    float64
dtype: object
In [ ]:
from sklearn.model_selection import cross_validate, StratifiedKFold
from sklearn.metrics import balanced_accuracy_score, cohen_kappa_score, make_scorer
from xgboost import XGBClassifier
import numpy as np

# Assuming 'combined_df' is the DataFrame with your data and 'class' is the target column
y = combined_df['class'].values
#y = combined_df['class'].values - 1  # Subtract 1 to make classes start from 0
X = combined_df.drop(columns=['class']).values

# Initialize the XGBoost classifier
tree = XGBClassifier(random_state=123)  # Tree is a reserved keyword in Python, so we use 'tree' variable

# Define the cross-validation strategy
cv_strategy = StratifiedKFold(n_splits=6, shuffle=True, random_state=123)

# Define scoring metrics
scoring_metrics = {
    'balanced_accuracy': make_scorer(balanced_accuracy_score),
    'kappa': make_scorer(cohen_kappa_score)
}

# Evaluate the model using cross-validation
pe = cross_validate(tree, X, y, cv=cv_strategy, scoring=scoring_metrics, verbose=1)

# Print the measurements
print(f"Balanced Accuracy: {np.mean(pe['test_balanced_accuracy'])}")
print(f"Cohen's Kappa: {np.mean(pe['test_kappa'])}")
Balanced Accuracy: 0.615085366767569
Cohen's Kappa: 0.4607336151786576
In [ ]:
# Check the shapes of all rasters in crst
raster_shapes = [raster.shape for raster in crst]
if not all(shape == raster_shapes[0] for shape in raster_shapes):
    print("Not all rasters have the same shape:", raster_shapes)
else:
    print("All rasters have the same shape:", raster_shapes[0])

    # Assuming all rasters are now confirmed to be of the same shape
    dfn = pd.DataFrame({f'band_{i}': raster.flatten() for i, raster in enumerate(crst)})
Not all rasters have the same shape: [(2341, 2928), (2341, 2928), (2341, 2928), (1171, 1464), (1171, 1464), (1171, 1464), (2341, 2928), (1171, 1464), (1, 2341, 2928), (1, 2341, 2928), (1, 2341, 2928), (1, 2341, 2928), (1, 2341, 2928), (1, 2341, 2928), (1, 2341, 2928)]
In [ ]:
from rasterio.warp import reproject, calculate_default_transform

def resize_raster_array(array, src_transform, src_crs, target_shape, target_bounds):
    # Calculate the new transform
    new_transform, width, height = calculate_default_transform(
        src_crs, src_crs, target_shape[1], target_shape[0], *target_bounds)
    
    # Set up an empty array for the resampled data
    resampled_array = np.empty((height, width), dtype=array.dtype)

    # Perform the resampling
    reproject(
        source=array,
        destination=resampled_array,
        src_transform=src_transform,
        src_crs=src_crs,
        dst_transform=new_transform,
        dst_crs=src_crs,
        resampling=Resampling.nearest
    )
    
    return resampled_array

# Assuming 'crst' is your list of rasters (numpy arrays)
# and 'b8' is the file path of a raster with the correct size
with rasterio.open(b8) as src:
    target_shape = src.shape
    src_transform = src.transform
    src_crs = src.crs
    target_bounds = src.bounds

# Resize rasters in 'crst' if needed
resized_crst = [resize_raster_array(raster, src_transform, src_crs, target_shape, target_bounds) if raster.shape != target_shape else raster for raster in crst]

# Convert resized_crst to a DataFrame
dfn = pd.DataFrame({f'band_{i}': raster.flatten() for i, raster in enumerate(resized_crst)})
In [ ]:
pred_reshaped
Out[ ]:
array([[2, 2, 1, ..., 0, 0, 0],
       [2, 2, 2, ..., 0, 0, 0],
       [2, 2, 2, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])
In [ ]:
# Assuming 'crst' is a list of rasters (numpy arrays)
# Convert the list of rasters to a DataFrame
#dfn = pd.DataFrame({f'band_{i}': raster.flatten() for i, raster in enumerate(crst)})

# Replace any non-finite values (like Inf) with 0
dfn = dfn.replace([np.inf, -np.inf], 0)

# Train the model
tree.fit(X, y)  # X and y should be your training data and labels

# Now you can make predictions
pred = tree.predict(dfn)

nodata_value = -9999  # You can choose any valid int32 value as the nodata value

# Assuming 'srs' is one of the original rasters and has the same spatial dimensions as 'crst'
with rasterio.open(b8) as src:  # Assuming b8 is a path to one of your rasters
    profile = src.profile
    profile.update(dtype=rasterio.int32, count=1, nodata=nodata_value)

    # Reshape the prediction array to match the spatial dimensions and write to a new raster file
    pred_reshaped = pred.reshape(src.shape).astype(np.int32)
    with rasterio.open('prediction.tif', 'w', **profile) as dst:
        dst.write(pred_reshaped, 1)

plt.imshow(pred_reshaped, cmap='viridis')  # Change colormap as needed
plt.colorbar()
plt.title('Prediction Raster')
plt.show()
No description has been provided for this image
In [ ]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

# Create a new figure and axis object
fig, ax = plt.subplots()

# Define the colors for each class
class_colors = ['yellow', 'green', 'red']  # Class 0 = yellow, 1 = green, 2 = red

# Create a ListedColormap object based on the class colors
cmap = ListedColormap(class_colors)

# Assuming 'pred_reshaped' is your 2D array of predictions
# Replace any non-finite values (like Inf) with the nodata value
pred_reshaped = np.where(np.isfinite(pred_reshaped), pred_reshaped, nodata_value)

# Mask the nodata values so they are not colored in the plot
masked_predictions = np.ma.masked_where(pred_reshaped == nodata_value, pred_reshaped)

# Plot the masked predictions array with the custom colormap
cbar = ax.imshow(masked_predictions, cmap=cmap)

# Optionally, create a colorbar with labels
colorbar_labels = ['Class 0', 'Class 1', 'Class 2']
cbar = fig.colorbar(cbar, ticks=[0, 1, 2])
cbar.ax.set_yticklabels(colorbar_labels)

# Set the title
ax.set_title('Prediction Raster')

# Display the plot
plt.show()
No description has been provided for this image
In [ ]:
import matplotlib.pyplot as plt

# Your visualization function remains unchanged
def visualize_sentinel2(sentinel2_bands, upper_percentile, ax):
    # Standard RGB composite for Sentinel-2: B04 (Red), B03 (Green), B02 (Blue)
    rgb = np.stack([sentinel2_bands[2], sentinel2_bands[1], sentinel2_bands[0]], axis=-1)
    max_val = np.percentile(rgb, upper_percentile * 100)
    rgb_normalized = np.clip(rgb, 0, max_val) / max_val
    ax.imshow(rgb_normalized)
    ax.axis('off')  # Turn off axis

# Create a figure with two subplots, one above the other
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 12))  # Adjust figsize as needed

# Plot the original Sentinel-2 image in the first subplot
visualize_sentinel2(sentinel2_bands, 0.90, ax1)

# Plot the prediction raster in the second subplot
# Assuming 'pred_reshaped' is your 2D numpy array with prediction classes
class_colors = ['yellow', 'green', 'red']  # Class 0 = yellow, 1 = green, 2 = red
cmap = ListedColormap(class_colors)
masked_predictions = np.ma.masked_where(pred_reshaped == nodata_value, pred_reshaped)
cbar = ax2.imshow(masked_predictions, cmap=cmap)
fig.colorbar(cbar, ax=ax2, ticks=[0, 1, 2], fraction=0.036, pad=0.04)  # Adjust colorbar size as needed
ax2.set_title('Prediction Raster')

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()
No description has been provided for this image
In [ ]:
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

# Define the bounds of the subset you want to extract
start_row, start_col = 1000, 1000  # for example
end_row, end_col = 1500, 1500     # for example

# Adjust the visualization function to plot a subset
def visualize_sentinel2_subset(sentinel2_bands, upper_percentile, ax, bounds):
    # Standard RGB composite for Sentinel-2: B04 (Red), B03 (Green), B02 (Blue)
    rgb = np.stack([sentinel2_bands[2], sentinel2_bands[1], sentinel2_bands[0]], axis=-1)
    # Crop the RGB composite image
    rgb_subset = rgb[bounds[0]:bounds[2], bounds[1]:bounds[3], :]
    max_val = np.percentile(rgb_subset, upper_percentile * 100)
    rgb_normalized = np.clip(rgb_subset, 0, max_val) / max_val
    ax.imshow(rgb_normalized)
    ax.axis('off')  # Turn off axis

# Create a figure with two subplots, one above the other
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 12))  # Adjust figsize as needed

# Plot the subset of the original Sentinel-2 image in the first subplot
visualize_sentinel2_subset(sentinel2_bands, 0.90, ax1, (start_row, start_col, end_row, end_col))

# Plot the subset of the prediction raster in the second subplot
class_colors = ['yellow', 'green', 'red']  # Class 0 = yellow, 1 = green, 2 = red
cmap = ListedColormap(class_colors)

# Crop the prediction raster to the same subset bounds
pred_subset = pred_reshaped[start_row:end_row, start_col:end_col]

masked_predictions = np.ma.masked_where(pred_subset == nodata_value, pred_subset)
cbar = ax2.imshow(masked_predictions, cmap=cmap)
fig.colorbar(cbar, ax=ax2, ticks=[0, 1, 2], fraction=0.036, pad=0.04)  # Adjust colorbar size as needed
ax2.set_title('Prediction Raster Subset')

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()
No description has been provided for this image
In [ ]: